
import os
import random 

# 设置随机种子
random.seed(0)


xmlpath = r'D:\dataset\DatasetId_1864811_1687846626\Annotations/'

# 生成 train.tx 和  valid.txt 的存储路径
saveBasePath=r'D:\dataset\DatasetId_1864811_1687846626\data/'

# 数据集图片存储路径，建议写相对路径
imgBasePathValid=r'D:\dataset\DatasetId_1864811_1687846626\images/'
 
# 数据集总数是1
trainval_percent = 1

# 划分比例
train_percent = 0.8
 
temp_xml = os.listdir(xmlpath)
total_xml = []
for xml in temp_xml:
    if xml.endswith(".xml"):
        total_xml.append(xml)

num=len(total_xml)  
list=range(num)  
tv=int(num*trainval_percent)  
tr=int(tv*train_percent)  
trainval= random.sample(list,tv)  
train=random.sample(trainval,tr)  
 
print("数据集总数：",tv)
print("训练集总数：",tr)
ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')  
ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')  
fval = open(os.path.join(saveBasePath,'valid.txt'), 'w')  
 
for i  in list:  
    name=total_xml[i][:-4] 

    # 根据数据集图片名称自己改png还是jpg
    image_path =imgBasePathValid + name +'.png'+'\n'
    if i in trainval:  
        ftrainval.write(image_path )  
        if i in train:  
            ftrain.write(image_path )  
        else:  
            fval.write(image_path)  
    else:  
        ftest.write(image_path )  

ftrainval.close()  
ftrain.close()  
fval.close()  
ftest .close()
